package com.outsider.util.pageprocessor;



import java.net.URLEncoder;
import java.text.SimpleDateFormat;
import java.util.Date;

import org.eclipse.jetty.util.UrlEncoded;

import com.outsider.util.Const;

import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.processor.PageProcessor;

public class AM_SearchPageprocessor implements PageProcessor{
	private Site site = Site.me().setUserAgent(Const.USER_AGENT).setSleepTime(1).setRetryTimes(3);
	private static final String AM_URL="http://www.amazon.cn/gp/aw/d/";
	@Override
	public void process(Page page) {
//		long start=System.currentTimeMillis();
//		System.out.println("AM:"+System.currentTimeMillis());
		if (page.getHtml().xpath("//*[@id=\"result_0\"]").toString()==null) return;
		String name=page.getHtml().xpath("//*[@class=\"a-link-normal\" and @class=\"s-access-detail-page\" and @class=\"a-text-normal\"]").regex("title=\"(.*?)\"").toString();
		String image=page.getHtml().xpath("//*[@class=\"a-link-normal\" and @class=\"a-text-normal\" and @target=\"_blank\" ").regex("src=\"(.*?)\"").toString();
		String price=page.getHtml().xpath("//*[@class=\"a-size-base\" and @class=\"a-color-price\" and @class=\"s-price\" and @class=\"a-text-bold\"]/text()").toString().substring(1);
		String url=AM_URL+page.getHtml().xpath("//*[@id=\"result_0\"]").regex("data-asin=\"(.*?)\"").toString();
		page.putField("name", name);
		page.putField("price", price);
		page.putField("image", image);
		page.putField("url", url);
		System.out.println("AM:"+new SimpleDateFormat(Const.DATE_FORMAT).format(new Date()));
	}

	@Override
	public Site getSite() {
		return site;
	}
	
	public static void main(String args[]) {
		Spider.create(new AM_SearchPageprocessor()).test("https://www.amazon.cn/s/field-keywords="+UrlEncoded.encodeString("gafafdsafads"));
		;
	}

}
